import pandas as pd
df = {}
from clustergrammer2 import net
import ipywidgets as widgets
import numpy as np
from bqplot import pyplot as plt
import bqplot
from glob import glob
glob('../data/big_data/*.txt')
glob('../data/big_data/*.csv')
See http://welikesharingdata.blob.core.windows.net/forshare/index.html
%%time
df['exp'] = pd.read_csv('../data/big_data/Suppl.Table2.CODEX_paper_MRLdatasetexpression.csv')
new_rows = ['C-' + str(x) for x in df['exp'].index.tolist()]
df['exp'].index = new_rows
print(df['exp'].shape)
df['exp'].head()
cols = df['exp'].columns.tolist()
cols
exp_cols = cols[1:30]
exp_cols
unique_dict = {}
for inst_col in cols:
inst_list_unique = list(df['exp'][inst_col].unique())
unique_dict[inst_col] = inst_list_unique
inst_num_unique = len(inst_list_unique)
print(inst_col, inst_num_unique)
BALBc: normal tissue MRL/lpr: spleen from animals with systemic autoimmune disease
Start with: 'BALBc-1_X01_Y01'
keep_rows = []
for inst_tile in ['BALBc-1_X01_Y01']:
ser_tile = df['exp']['sample_Xtile_Ytile']
ser_found = ser_tile[ser_tile == inst_tile]
ser_found.shape
keep_rows.extend(ser_found.index.tolist())
df['tile'] = df['exp'].loc[keep_rows].transpose()
df['tile'].shape
cats = df['tile'].loc['Imaging phenotype cluster ID']
new_cols = []
# cols = df['tile'].columns.tolist()
# for index in range(len(cols)):
# new_col = (cols[index], 'Cat: ID-' + str(cats[index]))
# new_cols.append(new_col)
# df['tile'].columns = new_cols
df['tile'].head()
df['tile-exp-ini'] = df['tile'].loc[exp_cols]
df['tile-exp-ini'].shape
ser_sum = df['tile-exp-ini'].sum(axis=0).sort_values(ascending=False)
ser_sum.plot(grid=True)
print(ser_sum.shape)
ser_sum = ser_sum[ser_sum < 100000]
print(ser_sum.shape)
ser_sum = ser_sum[ser_sum > 0]
print(ser_sum.shape)
keep_cells = ser_sum.index.tolist()
df['tile-exp'] = df['tile-exp-ini'][keep_cells]
df['tile-exp'].shape
df['tile-exp'][df['tile-exp'] < 0] = 0
df['tile-exp'].transpose().describe()
# df['tile-exp-ash'] = np.arcsinh(df['tile-exp']/5)
df['tile-exp'][df['tile-exp'] > 5000] = 5000
# ser_vals = pd.Series(df['tile-exp'].get_values().flatten())
# ser_vals.hist(bins=100)
df['tile-exp'].shape
df['tile-loc'] = df['tile'].loc[['X.X', 'Y.Y']][keep_cells].transpose()
df['tile-loc'].shape
df['tile-loc'] = df['tile-loc'].astype('int')
def set_expression_opacity(inst_gene):
ser_opacity = df['tile-exp'].loc[inst_gene]
list_opacity = [float(x/ser_opacity.max()) for x in list(ser_opacity.get_values())]
scatter.default_opacities = list_opacity
fig = plt.figure(title='Scatter')
def_tt = bqplot.Tooltip(fields=['name'], formats=[''])
scatter = plt.scatter(df['tile-loc']['X.X'],
df['tile-loc']['Y.Y'],
figsize=(20,10),
ylim=(0,1000),
xlim=(0,1000), stroke='black',
tooltip=def_tt,
names=df['tile-loc'].index.tolist(),
display_names=False)
inst_width = 900
fig.layout.min_height = str(inst_width/1.2) + 'px'
fig.layout.min_width = str(inst_width) + 'px'
# scatter.default_opacities = [0.5]
# scatter.default_opacities = [1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.2, 0.1, 0.1]
set_expression_opacity('NKp46')
scatter.default_size = 250
scatter.colors = ['red']
net.load_df(df['tile-exp'])
net.widget()
fig
# ser_opacity = df['tile-exp'].loc['CD106']
# ser_opacity = df['tile-exp'].loc['CD44']
ser_opacity = df['tile-exp'].loc['CD45']
# ser_opacity = df['tile-exp'].loc['NKp46']
# ser_opacity = df['tile-exp'].loc['IgD']
# ser_opacity = df['tile-exp'].loc['IgM']
list_opacity = [float(x/ser_opacity.max()) for x in list(ser_opacity.get_values())]
scatter.default_opacities = list_opacity